home *** CD-ROM | disk | FTP | other *** search
/ Enter 2002 August / EnterCD 8_2002.iso / Internet / Adobe GoLive 6.0 / data1.cab / PF_AppDir_Mod_PageGenerator / UrlReplaceDownload.java < prev   
Encoding:
Java Source  |  2002-03-28  |  6.6 KB  |  238 lines

  1. // ------------------------------------------------------------------
  2. //                                                                        
  3. // Purpose.............:    UrlReplaceDownload
  4. // Created.............:    May 8, 2001
  5. // Copyright...........:    (c) 2001 by Adobe Systems
  6. //
  7. // ------------------------------------------------------------------
  8.  
  9. import java.io.*;
  10. import java.util.ResourceBundle;
  11. import java.util.Locale;
  12. import java.net.URL;
  13. import java.net.HttpURLConnection;
  14. import java.net.URLConnection;
  15. import org.apache.regexp.*;
  16.  
  17. /**
  18.  * This class perform download URL and save to localdisk with replacing links.
  19.  * For example:
  20.  * <pre>
  21.  *   UrlReplaceDownload urlReplaceDownload = new UrlReplaceDownload(savePath);
  22.  *   urlReplaceDownload.replaceDownload(urlList);
  23.  * </pre>
  24.  *
  25.  * @author  Adobe Systems, Inc.
  26.  * @since 1.0
  27.  */
  28. public class UrlReplaceDownload {
  29.     /** Saving directory path */
  30.     static String savePath;
  31.  
  32.     /**
  33.      * Constructor with no parameter
  34.      * @param    void
  35.      * @return    void
  36.      */
  37.     public UrlReplaceDownload() {
  38.         savePath = "." + File.separatorChar;
  39.     }
  40.  
  41.     /**
  42.      * Constructor with saving directory path parameter
  43.      * @param    path Set saving directory path
  44.      * @return    void
  45.      */
  46.     public UrlReplaceDownload(String path) {
  47.         savePath = path;
  48.     }
  49.  
  50.     /**
  51.      * Set saving directory path
  52.      * @param    path Set saving directory path
  53.      * @return    void
  54.      */
  55.     public void setSavePath(String path) {
  56.         savePath = path;
  57.     }
  58.  
  59.     /**
  60.      * Download URL from list and save to local disk
  61.      * @param    urlList URL and saving filename list
  62.      * @return    int the number of downloaded count
  63.      * @see UrlList
  64.      */
  65.     public int replaceDownload(UrlList urlList) throws IOException {
  66.         BufferedOutputStream    bos = null;
  67.         BufferedInputStream        bis = null;
  68.         HttpURLConnection        http = null;
  69.  
  70.         int downloadedCount = 0;
  71.         ResourceBundle rb = ResourceBundle.getBundle("PageGenerator");
  72.         print(rb.getString("urlCount") + urlList.count);
  73.         for (int i = 0; i < urlList.count; i++) {
  74.             try {
  75.                 String outputFilePath = savePath + urlList.filePath[i] + urlList.file[i];
  76.  
  77.                 File f = new File(savePath + urlList.filePath[i]);
  78.                 f.mkdirs();
  79.  
  80.                 bos = new BufferedOutputStream(new FileOutputStream(outputFilePath));
  81.                 URL url = new URL(urlList.url[i].url);
  82.                 print("URL = " + urlList.url[i].url);
  83.                 print("    =>" + outputFilePath);
  84.  
  85.                 http = (HttpURLConnection)url.openConnection();
  86.                 bis = new BufferedInputStream(http.getInputStream());
  87.  
  88.                 byte[] b = new byte[32768];
  89.                 int size;
  90.                 String fname = urlList.file[i].toUpperCase();
  91.                 if (fname.indexOf(".HTM") == -1) {            // non-HTML page
  92.                     while ((size = bis.read(b)) != -1) {
  93.                         bos.write(b, 0, size);
  94.                     }
  95.                 } else {                                    // HTML page
  96.                     String buf = "";
  97.                     while ((size = bis.read(b)) != -1) {
  98.                         buf += new String(b, 0, size);
  99.                     }
  100.                     String replaced = replace(buf, urlList, i);
  101.                     bos.write(replaced.getBytes());
  102.                 }
  103.                 bos.flush();
  104.                 downloadedCount++;
  105.             } catch(Exception e) {
  106.                 if (http != null) {
  107.                     int errCode = -1;
  108.                     String errMsg = rb.getString("serverDidNotRespond");
  109.                     try {
  110.                         errCode = http.getResponseCode();
  111.                         errMsg = http.getResponseMessage();
  112.                     }
  113.                     catch (Exception xe) {}
  114.                     System.err.println(rb.getString("httpError") + errCode + " " + errMsg);
  115.                 } else {
  116.                     e.printStackTrace();
  117.                 }
  118.             } finally {
  119.                 try {
  120.                     if (bos != null)    bos.close();
  121.                 } catch(Exception e) {
  122.                     e.printStackTrace();
  123.                 }
  124.                 try {
  125.                     if (bis != null)    bis.close();
  126.                 } catch(Exception e) {
  127.                     e.printStackTrace();
  128.                 }
  129.                 try {
  130.                     if (http != null)    http.disconnect();
  131.                 } catch(Exception e) {
  132.                     e.printStackTrace();
  133.                 }
  134.                 bos  = null;
  135.                 bis  = null;
  136.                 http = null;
  137.             }
  138.         }
  139.         return downloadedCount;
  140.     }
  141.  
  142.     /**
  143.      * replace URL to filename
  144.      * @param    html Downloaded HTML content
  145.      * @param    urlList UrlList
  146.      * @param    listIndex Current downloading index in UrlList
  147.      * @return    Replaced HTML content string
  148.      * @see UrlList
  149.      */
  150.     private String replace(String html, UrlList urlList, int listIndex) {
  151.         int index = 0;
  152.         int hrefIndex = 0;
  153.         int startUrlIndex = 0;
  154.         int endQuoteIndex = 0;
  155.  
  156.         RE reQuoted = null;
  157.         
  158.         String href = "href=";
  159.         int hrefLen = href.length();
  160.         
  161.         for (index = 0; index < html.length() - hrefLen; index++) {
  162.             String comp = html.substring(index, index+hrefLen);
  163.             if (0 != href.compareToIgnoreCase(comp))    continue;
  164.             int    quotation = 0;
  165.             switch (html.charAt(index+hrefLen)) {
  166.                 case '\'':
  167.                     quotation = 1;
  168.                     startUrlIndex = index + hrefLen + 1;
  169.                     endQuoteIndex = html.indexOf('\'', startUrlIndex);
  170.                     break;
  171.                 case '\"':
  172.                     quotation = 2;
  173.                     startUrlIndex = index + hrefLen + 1;
  174.                     endQuoteIndex = html.indexOf('\"', startUrlIndex);
  175.                     break;
  176.                 default:
  177.                     quotation = 0;
  178.                     startUrlIndex = index + hrefLen;
  179.                     int candidate = 0;
  180.                     int temp = 0;
  181.                     if (-1 != (temp = html.indexOf(' ', startUrlIndex))) {
  182.                         candidate = temp;
  183.                     }
  184.                     if (-1 != (temp = html.indexOf('>', startUrlIndex))) {
  185.                         candidate = Math.min(candidate, temp);
  186.                     }
  187.                     endQuoteIndex = -1;
  188.                     if (candidate != 0) {    // check if there is wrong char mixed
  189.                         if (-1 != (temp = html.indexOf('<', startUrlIndex))) {
  190.                             if (temp < candidate)    break;
  191.                         }
  192.                         if (-1 != (temp = html.indexOf('\'', startUrlIndex))) {
  193.                             if (temp < candidate)    break;
  194.                         }
  195.                         if (-1 != (temp = html.indexOf('\"', startUrlIndex))) {
  196.                             if (temp < candidate)    break;
  197.                         }
  198.                         endQuoteIndex = candidate;    // it passed all check now
  199.                     }
  200.                     break;
  201.             }
  202.             if (endQuoteIndex == -1) {    // quote ending was not found
  203.                 ResourceBundle rb = ResourceBundle.getBundle("PageGenerator");
  204.                 System.err.println(rb.getString("endQuoteNotFound"));
  205.                 continue;
  206.             }
  207.             String orgUrl = html.substring(startUrlIndex, endQuoteIndex);
  208.             if (orgUrl.length() < 5) {    // href="URL": URL is too short.
  209.                 continue;
  210.             }
  211.             
  212.             String replaceFilePath = urlList.getReplaceFilePath(listIndex, orgUrl);
  213.             if (replaceFilePath == "") {
  214.                 // No replacing file path found"
  215.                 continue;
  216.             }
  217.             String newUrl = "\"" + replaceFilePath + "\"";
  218.             try {
  219.                 if (quotation == 1)    reQuoted  = REUtil.createRE("\'" + orgUrl  + "\'", RE.MATCH_MULTILINE);
  220.                 else                reQuoted  = REUtil.createRE("\"" + orgUrl  + "\"", RE.MATCH_MULTILINE);
  221.             } catch (RESyntaxException e) {
  222.                 e.printStackTrace();
  223.             }
  224.             html = reQuoted.subst(html, newUrl);
  225.         }
  226.         return html;
  227.     }
  228.  
  229.     /**
  230.      * Display string
  231.      * @param    s String to display
  232.      * @return    void
  233.      */
  234.     private static void print(String s) {
  235.         System.out.println(s);
  236.     }
  237. }
  238.